-
Notifications
You must be signed in to change notification settings - Fork 15.2k
SystemZ: Implement copyPhysReg between vr128 and gr128 #90616
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-systemz Author: Matt Arsenault (arsenm) ChangesI have no idea if this is correct and I probably swapped the element ordering somewhere. Full diff: https://github.com/llvm/llvm-project/pull/90616.diff 3 Files Affected:
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
index 6b75c30943b40a..ced7b38a1d717b 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -840,6 +840,41 @@ void SystemZInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
return;
}
+ if (SystemZ::GR128BitRegClass.contains(DestReg) &&
+ SystemZ::VR128BitRegClass.contains(SrcReg)) {
+ MCRegister DestH64 = RI.getSubReg(DestReg, SystemZ::subreg_h64);
+ MCRegister DestL64 = RI.getSubReg(DestReg, SystemZ::subreg_l64);
+
+ BuildMI(MBB, MBBI, DL, get(SystemZ::VLGVG), DestL64)
+ .addReg(SrcReg)
+ .addReg(SystemZ::NoRegister)
+ .addImm(0)
+ .addDef(DestReg, RegState::Implicit);
+ BuildMI(MBB, MBBI, DL, get(SystemZ::VLGVG), DestH64)
+ .addReg(SrcReg, getKillRegState(KillSrc))
+ .addReg(SystemZ::NoRegister)
+ .addImm(1);
+ return;
+ }
+
+ if (SystemZ::VR128BitRegClass.contains(DestReg) &&
+ SystemZ::GR128BitRegClass.contains(SrcReg)) {
+ MCRegister SrcH64 = RI.getSubReg(SrcReg, SystemZ::subreg_h64);
+ MCRegister SrcL64 = RI.getSubReg(SrcReg, SystemZ::subreg_l64);
+
+ BuildMI(MBB, MBBI, DL, get(SystemZ::VLVGG), DestReg)
+ .addReg(DestReg, RegState::Undef)
+ .addReg(SrcH64)
+ .addReg(SystemZ::NoRegister)
+ .addImm(0);
+ BuildMI(MBB, MBBI, DL, get(SystemZ::VLVGG), DestReg)
+ .addReg(DestReg)
+ .addReg(SrcL64)
+ .addReg(SystemZ::NoRegister)
+ .addImm(1);
+ return;
+ }
+
// Everything else needs only one instruction.
unsigned Opcode;
if (SystemZ::GR64BitRegClass.contains(DestReg, SrcReg))
diff --git a/llvm/test/CodeGen/SystemZ/copy-phys-reg-gr128-to-vr128.mir b/llvm/test/CodeGen/SystemZ/copy-phys-reg-gr128-to-vr128.mir
new file mode 100644
index 00000000000000..537d5b2ae0df14
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/copy-phys-reg-gr128-to-vr128.mir
@@ -0,0 +1,82 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
+# RUN: llc -mtriple=s390x-ibm-linux -mcpu=z13 -run-pass=postrapseudos -o - %s | FileCheck %s
+
+---
+name: copy_gr128_to_vr128__r0q_to_v0
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $r0q
+ ; CHECK-LABEL: name: copy_gr128_to_vr128__r0q_to_v0
+ ; CHECK: liveins: $r0q
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $v0 = VLVGG undef $v0, $r0d, $noreg, 0
+ ; CHECK-NEXT: $v0 = VLVGG $v0, $r1d, $noreg, 1
+ ; CHECK-NEXT: Return implicit $v0
+ $v0 = COPY $r0q
+ Return implicit $v0
+...
+
+---
+name: copy_gr128_to_vr128__r0q_to_v0_killed
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $r0q
+ ; CHECK-LABEL: name: copy_gr128_to_vr128__r0q_to_v0_killed
+ ; CHECK: liveins: $r0q
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $v0 = VLVGG undef $v0, $r0d, $noreg, 0
+ ; CHECK-NEXT: $v0 = VLVGG $v0, $r1d, $noreg, 1
+ ; CHECK-NEXT: Return implicit $v0
+ $v0 = COPY killed $r0q
+ Return implicit $v0
+...
+
+---
+name: copy_gr128_to_vr128__r0q_to_v0_undef
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $r0q
+ ; CHECK-LABEL: name: copy_gr128_to_vr128__r0q_to_v0_undef
+ ; CHECK: liveins: $r0q
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $v0 = KILL undef $r0q
+ ; CHECK-NEXT: Return implicit $v0
+ $v0 = COPY undef $r0q
+ Return implicit $v0
+...
+
+---
+name: copy_gr128_to_vr128__r0q_to_v0_subreg0
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $r0d
+ ; CHECK-LABEL: name: copy_gr128_to_vr128__r0q_to_v0_subreg0
+ ; CHECK: liveins: $r0d
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $v0 = VLVGG undef $v0, $r0d, $noreg, 0
+ ; CHECK-NEXT: $v0 = VLVGG $v0, $r1d, $noreg, 1
+ ; CHECK-NEXT: Return implicit $v0
+ $v0 = COPY $r0q
+ Return implicit $v0
+...
+
+---
+name: copy_gr128_to_vr128__r0q_to_v0_subreg1
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $r1d
+ ; CHECK-LABEL: name: copy_gr128_to_vr128__r0q_to_v0_subreg1
+ ; CHECK: liveins: $r1d
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $v0 = VLVGG undef $v0, $r0d, $noreg, 0
+ ; CHECK-NEXT: $v0 = VLVGG $v0, $r1d, $noreg, 1
+ ; CHECK-NEXT: Return implicit $v0
+ $v0 = COPY $r0q
+ Return implicit $v0
+...
+
diff --git a/llvm/test/CodeGen/SystemZ/copy-phys-reg-vr128-to-gr128.mir b/llvm/test/CodeGen/SystemZ/copy-phys-reg-vr128-to-gr128.mir
new file mode 100644
index 00000000000000..4eccd49cae33bc
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/copy-phys-reg-vr128-to-gr128.mir
@@ -0,0 +1,76 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
+# RUN: llc -mtriple=s390x-ibm-linux -mcpu=z13 -run-pass=postrapseudos -o - %s | FileCheck %s
+
+---
+name: copy_vr128_to_gr128__v0_to_r0q
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $v0
+ ; CHECK-LABEL: name: copy_vr128_to_gr128__v0_to_r0q
+ ; CHECK: liveins: $v0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $r1d = VLGVG $v0, $noreg, 0, implicit-def $r0q
+ ; CHECK-NEXT: $r0d = VLGVG $v0, $noreg, 1
+ ; CHECK-NEXT: Return implicit $r0q
+ $r0q = COPY $v0
+ Return implicit $r0q
+...
+
+---
+name: copy_vr128_to_gr128__v0_to_r0q_killed
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $v0
+ ; CHECK-LABEL: name: copy_vr128_to_gr128__v0_to_r0q_killed
+ ; CHECK: liveins: $v0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $r1d = VLGVG $v0, $noreg, 0, implicit-def $r0q
+ ; CHECK-NEXT: $r0d = VLGVG killed $v0, $noreg, 1
+ ; CHECK-NEXT: Return implicit $r0q
+ $r0q = COPY killed $v0
+ Return implicit $r0q
+...
+
+---
+name: copy_vr128_to_gr128__v0_to_r0q_undef
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: copy_vr128_to_gr128__v0_to_r0q_undef
+ ; CHECK: liveins: $v0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $r0q = KILL undef $v0
+ ; CHECK-NEXT: Return implicit $r0q
+ $r0q = COPY undef $v0
+ Return implicit $r0q
+...
+
+---
+name: copy_vr128_to_gr128__v0_to_r0q_undef_use_subreg0
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: copy_vr128_to_gr128__v0_to_r0q_undef_use_subreg0
+ ; CHECK: liveins: $v0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $r0q = KILL undef $v0
+ ; CHECK-NEXT: Return implicit $r0d
+ $r0q = COPY undef $v0
+ Return implicit $r0d
+...
+
+---
+name: copy_vr128_to_gr128__v0_to_r0q_undef_use_subreg1
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: copy_vr128_to_gr128__v0_to_r0q_undef_use_subreg1
+ ; CHECK: liveins: $v0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $r0q = KILL undef $v0
+ ; CHECK-NEXT: Return implicit $r1d
+ $r0q = COPY undef $v0
+ Return implicit $r1d
+...
|
I have no idea if this is correct and I probably swapped the element ordering somewhere.
a48e70d
to
994bdb1
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm wondering where you ran into this ... I don't think we've seen any issues due to this missing copy before.
I'm several levels deep on yak shaving and I'm trying to at least partially fix the hacky handling of i128 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM now, thanks!
I got an error with the verifier on this test case. Fixed with:
ok to commit? |
I think the correct fix is to add undef to the missing half. The liveness is supposed to be introduced by the copy here |
I am a little confused: If only 64-bits (one GPR) is defined and inserted, I would have thought a vector element load (VLVGG) would be used, but that would be something like a COPY from GR64 to VR64, except VR64 does not hold i64. Is it allowed to add an undef implicit use to the COPY source for the undef part..? copy_gr128_to_vr128__r0q_to_v0_undef: COPY undef $r0q: maybe don't put r0q in the liveins list? |
The point isn't to be optimal, the point is to ensure the liveness is tracked correctly
The point is the undef read on the def of the super register acts like a hidden IMPLICIT_DEF of the non-live half of the register |
Given abbd73e, should we similarly just remove the subreg tests as redundant here? |
I have no idea if this is correct and I probably swapped the element ordering somewhere.